Load api key for census.gov

This key is registered to "example@example.com"

api.key.install("OU812LONGHASH")

Find the codes for Illinois and Cook County

  1. Find the code
  2. Get the census tract boundary data (put into dfw)
  3. Simple plot of census tracts
lookup_code(state = "Illinois", county = "Cook")
## [1] "The code for Illinois is '17' and the code for Cook County is '031'."
dfw <- tracts(state = '17', county = c('031'))
plot(dfw)

The census data has the following:

  1. summary data / labels
  2. polygons (truncated)
  3. the “bounding box” coordinates
  4. details about the geometry assumptions (projection method)
str(dfw, 2)
## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  1319 obs. of  12 variables:
##   ..@ polygons   :List of 1319
##   .. .. [list output truncated]
##   ..@ plotOrder  : int [1:1319] 695 770 855 124 775 49 632 1309 755 122 ...
##   ..@ bbox       : num [1:2, 1:2] -88.3 41.5 -87.1 42.2
##   .. ..- attr(*, "dimnames")=List of 2
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot

Download “B19013_001” from http://factfinder.census.gov/

Use acs.fetch to get B19013_001 (income data).

I’m not sure what causes the NA Warning… I think it might be the missing data in the census tract that appears to be Lake Michigan. As it turns out, Lake Michigan is very sparsely populated by humans (and humnas are the only animals included in the US census).

income_data <- acs.fetch(endyear = 2012, 
                         geography = geo.make(state = "IL", 
                                              county = c(31), 
                                              tract = "*"), 
                         variable = "B19013_001")
## Warning: NAs introduced by coercion

The income data includes a lot of metadata:

str(income_data)
## Formal class 'acs' [package "acs"] with 9 slots
##   ..@ endyear       : int 2012
##   ..@ span          : int 5
##   ..@ geography     :'data.frame':   1319 obs. of  4 variables:
##   .. ..$ NAME  : chr [1:1319] "Census Tract 101, Cook County, Illinois" "Census Tract 102.01, Cook County, Illinois" "Census Tract 102.02, Cook County, Illinois" "Census Tract 103, Cook County, Illinois" ...
##   .. ..$ state : int [1:1319] 17 17 17 17 17 17 17 17 17 17 ...
##   .. ..$ county: int [1:1319] 31 31 31 31 31 31 31 31 31 31 ...
##   .. ..$ tract : chr [1:1319] "010100" "010201" "010202" "010300" ...
##   ..@ acs.colnames  : chr "B19013_001"
##   ..@ modified      : logi FALSE
##   ..@ acs.units     : Factor w/ 5 levels "count","dollars",..: 1
##   ..@ currency.year : int 2012
##   ..@ estimate      : num [1:1319, 1] 31063 38766 36369 41315 43125 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:1319] "Census Tract 101, Cook County, Illinois" "Census Tract 102.01, Cook County, Illinois" "Census Tract 102.02, Cook County, Illinois" "Census Tract 103, Cook County, Illinois" ...
##   .. .. ..$ : chr "B19013_001"
##   ..@ standard.error: num [1:1319, 1] 1878 3632 10489 6226 5308 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:1319] "Census Tract 101, Cook County, Illinois" "Census Tract 102.01, Cook County, Illinois" "Census Tract 102.02, Cook County, Illinois" "Census Tract 103, Cook County, Illinois" ...
##   .. .. ..$ : chr "B19013_001"

Convert Census Data To data.frame

GEOID is a concatination of state, county, census tract. The county has to be 3 characters, padded by leading zeros (hence the sprintf statement). In the original NY example the county was already 3 characters long and so it wasn’t necessary to pad with zeros… pretty tricky to figure that out.

income_df <- data.frame(GEOID = paste0(sprintf("%02.f", income_data@geography$state), 
                                       sprintf("%03.f", income_data@geography$county), 
                                       income_data@geography$tract), 
                        hhincome = income_data@estimate[,1], 
                        stringsAsFactors=F)
# str(income_df)

Clean up some NA values:

geneorama::NAsummary(income_df)
##          col Count nNA    rNA nUnique rUnique
## GEOID      1  1319   0 0.0000    1319  1.0000
## hhincome   2  1319   5 0.0037    1282  0.9719
income_df[is.na(income_df$hhincome),"hhincome"] <- 0

Merge Map and Census Data

Thanks to the sprintf statement (earlier) the GEOIDs are in the same format, and can be merged.

head(dfw@data$GEOID)
## [1] "17031030101" "17031030701" "17031070103" "17031807100" "17031807200" "17031807300"
head(income_df$GEOID)
## [1] "17031010100" "17031010201" "17031010202" "17031010300" "17031010400" "17031010501"
geneorama::inin(dfw@data$GEOID, income_df$GEOID)
##                                                    inin.summary
## Items in dfw@data$GEOID                                    1319
## Items in income_df$GEOID                                   1319
## Items in dfw@data$GEOID and income_df$GEOID                1319
## Items in dfw@data$GEOID but not in income_df$GEOID           NA
## Items in income_df$GEOID but not in dfw@data$GEOID           NA
dfw_merged <- geo_join(dfw, income_df, "GEOID", "GEOID")

Plot

Choose a color palette Define the pop up text Plot using leaflet (this example uses the %>% data flow paradigm)

pal <- colorQuantile("Greens", NULL, n = 6)
popup <- paste0("Median household income: ", as.character(dfw_merged$hhincome))
leaflet() %>%
    addProviderTiles("CartoDB.Positron") %>%
    addPolygons(data = dfw_merged, 
                fillColor = ~pal(dfw_merged$hhincome), 
                fillOpacity = 0.7, 
                weight = 0.2, 
                popup = popup) %>%
    addLegend(pal = pal, 
              values = dfw_merged$hhincome, 
              position = "bottomright", 
              title = "Income in DFW")